"""
常量定义
"""

# 支持的文件格式
LOADER_DICT = {
    "UnstructuredHTMLLoader": [".html", ".htm"],
    "MHTMLLoader": [".mhtml"],
    "TextLoader": [".md", ".txt"],
    "UnstructuredMarkdownLoader": [".md"],
    "JSONLoader": [".json"],
    "JSONLinesLoader": [".jsonl"],
    "CSVLoader": [".csv"],
    "RapidOCRPDFLoader": [".pdf"],
    "RapidOCRDocLoader": [".docx"],
    "RapidOCRPPTLoader": [".ppt", ".pptx"],
    "RapidOCRLoader": [".png", ".jpg", ".jpeg", ".bmp"],
    "UnstructuredFileLoader": [
        ".eml", ".msg", ".rst", ".rtf", ".xml", 
        ".epub", ".odt", ".tsv"
    ],
    "UnstructuredEmailLoader": [".eml", ".msg"],
    "UnstructuredEPubLoader": [".epub"],
    "UnstructuredExcelLoader": [".xlsx", ".xls"],
    "NotebookLoader": [".ipynb"],
    "UnstructuredODTLoader": [".odt"],
    "PythonLoader": [".py"],
    "UnstructuredRSTLoader": [".rst"],
    "UnstructuredRTFLoader": [".rtf"],
    "SRTLoader": [".srt"],
    "TomlLoader": [".toml"],
    "UnstructuredTSVLoader": [".tsv"],
    "UnstructuredWordDocumentLoader": [".docx"],
    "UnstructuredXMLLoader": [".xml"],
    "UnstructuredPowerPointLoader": [".ppt", ".pptx"],
    "EverNoteLoader": [".enex"],
}

# 支持的向量存储类型
SUPPORTED_VS_TYPES = {
    "FAISS": "faiss",
    "MILVUS": "milvus", 
    "PG": "pg",
    "CHROMADB": "chromadb",
    "ES": "es",
    "ZILLIZ": "zilliz",
    "RELYT": "relyt",
}

# 文本分割器配置
TEXT_SPLITTER_DICT = {
    "ChineseRecursiveTextSplitter": {
        "source": "",
        "tokenizer_name_or_path": "",
    },
    "SpacyTextSplitter": {
        "source": "huggingface",
        "tokenizer_name_or_path": "gpt2",
    },
    "RecursiveCharacterTextSplitter": {
        "source": "tiktoken",
        "tokenizer_name_or_path": "cl100k_base",
    },
    "MarkdownHeaderTextSplitter": {
        "headers_to_split_on": [
            ("#", "head1"),
            ("##", "head2"),
            ("###", "head3"),
            ("####", "head4"),
        ]
    },
}

# 默认配置
DEFAULT_CHUNK_SIZE = 750
DEFAULT_OVERLAP_SIZE = 150
DEFAULT_TOP_K = 3
DEFAULT_SCORE_THRESHOLD = 0.5
DEFAULT_EMBEDDING_MODEL = "text2vec-chinese"
DEFAULT_VECTOR_STORE_TYPE = "faiss"
DEFAULT_STORAGE_PATH = "./data" 